Important note: the following (commented) code is for the local data exported from kallisto as I ran the entire analysis from scratch. To use the data from the GEO datasets, use code below (GEO data preparation).
rm(list = ls())
data_dir <- here::here("Data", "Fibroblasts", "ProcessedData")
library(limma)
library(Glimma)
library(edgeR)
library(tidyverse)
library(tximport)
library(DESeq2)
library(RColorBrewer)
genewizDir <- "/Users/annamonzel/Library/CloudStorage/GoogleDrive-asm2285@cumc.columbia.edu/.shortcut-targets-by-id/1pUjuYVHpRn5G2HeHNW2X0DBAVzvL-vMa/MitoLab - General/ Members Folders/Gabriel Sturm/Projects/Project 2- Cell Lifespan Aging/RNA/RNAseq/Genewiz/"
# Get metadata and samples included in analysis
samples <- read_csv(here::here("Data", "Fibroblasts", "OriginalData", "RNAseq_meta.csv")) %>%
mutate(tmp = "Sample_") %>%
unite(RNAseq_sampleID, tmp, RNAseq_sampleID, sep = "") %>%
as.data.frame() %>%
dplyr::select(RNAseq_sampleID, Cell_line_group, Clinical_condition, Treatments, Percent_oxygen, Unique_variable_name) %>%
separate(RNAseq_sampleID, into = c("Sample", "ID"), remove = F) %>%
mutate(ID = as.numeric(ID))
samples_all <- read_csv(here::here("Data", "Fibroblasts", "OriginalData", "RNAseq_meta.csv"))
length(unique(samples_all$Cell_line_inhouse))
## [1] 8
max(na.omit(samples_all$Passage))
## [1] 41
samples_to_read <- sort(as.numeric(samples$ID))
files <- file.path(genewizDir, 'kallisto_output_newv3', samples_to_read, "abundance.tsv")
names(files) <- paste0("Sample_", samples_to_read)
x = read.csv(here::here(genewizDir, 'kallisto_output_newv3/1/abundance.tsv'),sep='\t',header=T)
## summarize only across coding RNAs
mRNA <- x$target_id[which(grepl("mRNA", x$target_id)==TRUE)]
mtRNA <- x$target_id[which(grepl("CDS", x$target_id)==TRUE)]
mRNA_genes = sapply(lapply(strsplit(as.character(mRNA), '\\;'), 'rev'), '[', 2)
mtRNA_genes = sapply(lapply(strsplit(as.character(mtRNA), '\\;'), 'rev'), '[', 2)
tx2gene = data.frame(target_id = as.character(c(mRNA, mtRNA)), gene = c(mRNA_genes, mtRNA_genes))
## Important: order samples in counts dataset in the same order they are in the metadata (samples)
txi <- tximport(files, type = "kallisto",tx2gene=tx2gene, countsFromAbundance = "no")
gene_counts <- txi$counts[,samples$RNAseq_sampleID]
gene_abundances <- txi$abundance[,samples$RNAseq_sampleID]
gene_lengths <- txi$length[,samples$RNAseq_sampleID]
txi.tx <- tximport(files, type = "kallisto",tx2gene=tx2gene, txOut = TRUE) #countsFromAbundance = "lengthScaledTPM",
transcript_counts <- txi.tx$counts[,samples$RNAseq_sampleID]
transcript_abundances <- txi.tx$abundance[,samples$RNAseq_sampleID]
transcript_lengths <- txi.tx$length[,samples$RNAseq_sampleID]
dim(gene_counts)
## [1] 19378 345
dim(transcript_counts)
## [1] 81643 345
for_viz <- as.data.frame(transcript_counts) %>%
rownames_to_column("Genes") %>%
separate(Genes, into = c("REFSEQ1", "REFSEQ2", "SYMBOL", "TYPE"), sep = ";") %>%
pivot_longer(cols = -c("REFSEQ1", "REFSEQ2", "SYMBOL", "TYPE"), names_to = "RNAseq_sampleID", values_to = "exprs") %>%
full_join(samples, by = "RNAseq_sampleID") %>%
group_by(TYPE, RNAseq_sampleID) %>%
mutate(mean = mean(exprs)) %>%
ungroup() %>%
dplyr::select(TYPE, RNAseq_sampleID, mean) %>%
unique()
for_viz %>%
ggplot(aes(x = reorder(RNAseq_sampleID, mean), y = mean)) +
geom_col(mapping = NULL) +
facet_wrap(~TYPE, scales = "free") +
theme(legend.position = "none")
rm(samples_to_read, files, for_viz, x, tx2gene)
x <- DGEList(counts = gene_counts,
samples = samples,
group = samples$Cell_line_group
)
plotly::ggplotly(x$samples %>%
rownames_to_column("sample") %>%
unite(Condition, Clinical_condition, Treatments) %>%
ggplot(aes(x = reorder(sample, lib.size), y = lib.size, fill = Condition, label = Cell_line_group)) +
geom_bar(stat = "identity") +
labs(y = "Library size (total number of mapped and quantified reads)",
x = "Sample", fill = "Group") +
coord_flip())
https://www.nature.com/articles/s41467-023-41132-w
# rm(list = setdiff(ls(), c("samples", "genewizDir", "data_dir")))
# samples_to_read <- sort(as.numeric(samples$ID))
# files <- file.path(genewizDir, 'kallisto_output_newv3', samples_to_read, "abundance.tsv")
# names(files) <- paste0("Sample_", samples_to_read)
# x = read.csv(here::here(genewizDir, 'kallisto_output_newv3/1/abundance.tsv'),sep='\t',header=T)
# ## summarize only across coding RNAs
# mRNA <- x$target_id[which(grepl("mRNA", x$target_id)==TRUE)]
# mtRNA <- x$target_id[which(grepl("CDS", x$target_id)==TRUE)]
# mRNA_genes = sapply(lapply(strsplit(as.character(mRNA), '\\;'), 'rev'), '[', 2)
# mtRNA_genes = sapply(lapply(strsplit(as.character(mtRNA), '\\;'), 'rev'), '[', 2)
# tx2gene = data.frame(target_id = as.character(c(mRNA, mtRNA)), gene = c(mRNA_genes, mtRNA_genes))
#
# txi <- tximport(files, type = "kallisto",tx2gene=tx2gene, countsFromAbundance = "lengthScaledTPM")
# gene_counts <- txi$counts[,samples$RNAseq_sampleID]
# gene_abundances <- txi$abundance[,samples$RNAseq_sampleID]
# gene_lengths <- txi$length[,samples$RNAseq_sampleID]
#
#
# sapply(as.data.frame(gene_counts), sum)
# pTPM <- cpm(gene_counts)
# sapply(as.data.frame(pTPM), sum)
# #saveRDS(pTPM, here::here(data_save_dir,"pTPM_Lifespan.rds"))
#
# ref.samples <- samples %>%
# filter(Clinical_condition %in% "Normal") %>%
# filter(Treatments %in% "Control") %>%
# filter(Percent_oxygen %in% "21") %>%
# pull(RNAseq_sampleID)
# ref.column <- data.frame(Median = rowMedians(pTPM[,ref.samples]))
# rownames(ref.column) = rownames(pTPM)
# pTPM <- bind_cols(ref.column, pTPM)
#
# nTPM <- NOISeq::tmm(pTPM, refColumn = "Median")
# sapply(as.data.frame(nTPM), sum)
# nTPM <- nTPM[,-1]
#
# saveRDS(nTPM, here::here(data_dir,"nTPM_Lifespan.rds"))
# rm(list = setdiff(ls(), c("nTPM","keep.exprs", "samples", "genewizDir", "data_dir")))
Note: mtDNA genes in Mitocarta are “MT-XXX” and only “XXX” in kallisto. (changed here to match)
# genes_found <- rownames(nTPM)
#
# mitogenes <- readxl::read_xls(here::here("Data", "MitoCarta","OriginalData", "HumanMitoCarta3_0.xls"), sheet = 2) %>%
# pull(Symbol)
# mitogenes_mt <- mitogenes[grepl("MT-", mitogenes)]
# mitogenes_mt <- str_remove(mitogenes_mt, "MT-")
# mitogenes <- mitogenes[!grepl("MT-", mitogenes)]
# mitogenes <- c(mitogenes, mitogenes_mt)
#
# genes_found_mt <- unique(sort(genes_found[which(genes_found %in% mitogenes)]))
# length(genes_found_mt)
The following genes were not found
# mitogenes[which(!mitogenes%in% genes_found_mt)]
Note: MT-CO1, MT-CO2 and MT-CO3 are differently annotated in kallisto. Here are all mtDNA genes as annotated in Kallisto:
# samples_to_read <- sort(as.numeric(samples$ID))
# files <- file.path(genewizDir, 'kallisto_output_newv3', samples_to_read, "abundance.tsv")
# names(files) <- paste0("Sample_", samples_to_read)
# x = read.csv(here::here(genewizDir, 'kallisto_output_newv3/1/abundance.tsv'),sep='\t',header=T)
#
# ## summarize only across coding RNAs
# mRNA <- x$target_id[which(grepl("mRNA", x$target_id)==TRUE)]
# mtRNA <- x$target_id[which(grepl("CDS", x$target_id)==TRUE)]
# mRNA_genes = sapply(lapply(strsplit(as.character(mRNA), '\\;'), 'rev'), '[', 2)
# mtRNA_genes = sapply(lapply(strsplit(as.character(mtRNA), '\\;'), 'rev'), '[', 2)
# tx2gene = data.frame(target_id = as.character(c(mRNA, mtRNA)), gene = c(mRNA_genes, mtRNA_genes))
#
# ## Important: order samples in counts dataset in the same order they are in the metadata (samples)
# txi <- tximport(files, type = "kallisto",tx2gene=tx2gene, countsFromAbundance = "no")
# gene_counts <- txi$counts[,samples$RNAseq_sampleID]
# gene_abundances <- txi$abundance[,samples$RNAseq_sampleID]
# gene_lengths <- txi$length[,samples$RNAseq_sampleID]
#
# txi.tx <- tximport(files, type = "kallisto",tx2gene=tx2gene, txOut = TRUE)
# transcript_counts <- txi.tx$counts[,samples$RNAseq_sampleID]
# transcript_abundances <- txi.tx$abundance[,samples$RNAseq_sampleID]
# transcript_lengths <- txi.tx$length[,samples$RNAseq_sampleID]
#
#
# mtDNAgenes_in_data <- as.data.frame(transcript_counts) %>%
# rownames_to_column("Genes") %>%
# separate(Genes, into = c("REFSEQ1", "REFSEQ2", "SYMBOL", "TYPE"), sep = ";") %>%
# filter(TYPE %in% "CDS") %>% unique()
# mtDNAgenes_in_data$SYMBOL
This has to be changed in downstream analyses, but kept for now. I just search by mitogenes + CDS annotation in kallisto. The other two genes not found are MYG1, also known as C12orf10, and RP11_469A15.2, which is a Long intergenic non-coding RNA (lincRNA). C12orf10 could be found, but RP11_469A15.2 was excluded (only mtDNA-RNA and mRNA kept)
# genes_found <- rownames(nTPM)
# genes_found_mt <- unique(sort(genes_found[which(genes_found %in% c(mitogenes, mtDNAgenes_in_data$SYMBOL, "C12orf10"))]))
# length(genes_found_mt)
# rm(genes_found, genes_found_mt, mtDNAgenes_in_data, mitogenes, mitogenes_mt)
Run this if you downloaded the data from GEO. Note that the data on GEO was prepared by Sturm et al. 2022, with different R software and package versions. It might not 100% match the data prepared using the code above (kallisto, tximport). This does not affect the figures overall, but some statistics might differ slightly (e.g. the PCA variances). The raw data that was used above can be shared upon request.
gene_counts <- read_csv(here::here("Data/Fibroblasts/OriginalData", "GSE179848_txi_gene_count_lengthScaledTPM.csv")) %>%
pivot_longer(cols = -`...1`) %>%
separate(name, into = c("Sample", "Type"), sep = "_") %>%
select(-Type) %>%
mutate(Sample2 = "Sample") %>%
unite(Sample, Sample2, Sample, sep = "_") %>%
pivot_wider(names_from = Sample, values_from = value) %>%
column_to_rownames("...1")
samples <- read_csv(here::here("Data", "Fibroblasts", "OriginalData", "RNAseq_meta.csv")) %>%
mutate(tmp = "Sample_") %>%
unite(RNAseq_sampleID, tmp, RNAseq_sampleID, sep = "") %>%
as.data.frame() %>%
dplyr::select(RNAseq_sampleID, Cell_line_group, Clinical_condition, Treatments, Percent_oxygen, Unique_variable_name) %>%
separate(RNAseq_sampleID, into = c("Sample", "ID"), remove = F) %>%
mutate(ID = as.numeric(ID))
sapply(as.data.frame(gene_counts), sum)
## Sample_1 Sample_10 Sample_100 Sample_101 Sample_102 Sample_103 Sample_104 Sample_105 Sample_106 Sample_107 Sample_108 Sample_109 Sample_11 Sample_110 Sample_111
## 24121173 28068561 18073068 21862855 16161203 16240550 16164500 19652441 32521027 33830170 28052932 26585455 25770289 17017719 15988597
## Sample_112 Sample_113 Sample_114 Sample_115 Sample_116 Sample_117 Sample_118 Sample_119 Sample_12 Sample_120 Sample_121 Sample_122 Sample_123 Sample_124 Sample_125
## 15335512 15315648 32400378 29525720 16693851 17148961 16725654 14893595 29913771 14716611 32018294 14696524 16343487 15696374 16585264
## Sample_126 Sample_127 Sample_128 Sample_129 Sample_13 Sample_130 Sample_131 Sample_132 Sample_133 Sample_134 Sample_135 Sample_136 Sample_137 Sample_138 Sample_139
## 24360293 15692107 16759403 16321255 27046126 26393324 32720498 16775203 16034219 26026407 15769954 15189129 34322206 16464534 18286467
## Sample_14 Sample_140 Sample_141 Sample_142 Sample_143 Sample_144 Sample_145 Sample_146 Sample_147 Sample_148 Sample_149 Sample_15 Sample_150 Sample_151 Sample_152
## 23395123 16004589 32263526 16966592 28948725 16476002 15249094 16371617 31874629 15436643 31414557 22159800 17095270 15080323 17738300
## Sample_153 Sample_154 Sample_155 Sample_156 Sample_157 Sample_158 Sample_159 Sample_16 Sample_160 Sample_161 Sample_162 Sample_163 Sample_164 Sample_165 Sample_166
## 30673378 15455748 20348587 32376557 28415145 27872854 20984736 26778552 15379338 16507472 43030609 17022274 39533116 17397731 30162065
## Sample_167 Sample_168 Sample_169 Sample_17 Sample_170 Sample_171 Sample_172 Sample_173 Sample_174 Sample_175 Sample_176 Sample_177 Sample_178 Sample_179 Sample_18
## 25987017 17762789 32530009 21747913 35254527 34079826 16643159 31602177 16411681 36023102 28067664 37811275 27886856 57407880 24290246
## Sample_180 Sample_181 Sample_182 Sample_183 Sample_184 Sample_185 Sample_186 Sample_187 Sample_188 Sample_189 Sample_19 Sample_190 Sample_191 Sample_192 Sample_193
## 27736172 27442933 27748290 23988606 27504197 20234189 26439951 25808298 20443532 19811651 23315592 18651843 20115710 20035136 19060742
## Sample_194 Sample_195 Sample_196 Sample_197 Sample_198 Sample_199 Sample_2 Sample_20 Sample_200 Sample_201 Sample_202 Sample_203 Sample_204 Sample_205 Sample_206
## 18799928 20420732 21255619 20014220 19992651 18440425 28127609 25414941 20159125 17452643 19100341 20129452 19854538 18151660 17214698
## Sample_207 Sample_208 Sample_209 Sample_21 Sample_210 Sample_211 Sample_212 Sample_213 Sample_214 Sample_215 Sample_216 Sample_217 Sample_218 Sample_219 Sample_22
## 16682096 20822215 18524637 25313017 20581205 18611663 20733889 17923352 17219943 18384748 20427140 15210543 15731326 16702374 20890266
## Sample_220 Sample_221 Sample_222 Sample_223 Sample_224 Sample_225 Sample_226 Sample_227 Sample_228 Sample_229 Sample_23 Sample_230 Sample_231 Sample_232 Sample_233
## 18127190 15638143 15530759 16665951 20196311 14873291 15767471 18123730 17992631 15921825 20339367 15754577 16524941 17895006 15816164
## Sample_234 Sample_235 Sample_236 Sample_237 Sample_238 Sample_239 Sample_24 Sample_240 Sample_241 Sample_242 Sample_243 Sample_244 Sample_245 Sample_246 Sample_247
## 18338232 17576536 15503979 17452081 15825035 14384258 22948263 16632678 17219495 17207881 17597840 17333325 18158663 16742446 14623763
## Sample_248 Sample_249 Sample_25 Sample_250 Sample_251 Sample_252 Sample_253 Sample_254 Sample_255 Sample_256 Sample_257 Sample_258 Sample_259 Sample_26 Sample_260
## 16073443 15860384 24769372 16014772 24550069 25851549 21856005 21977244 20085904 19691262 25089446 24522304 27441698 25072003 25623197
## Sample_261 Sample_262 Sample_263 Sample_264 Sample_268 Sample_27 Sample_272 Sample_273 Sample_275 Sample_276 Sample_279 Sample_28 Sample_280 Sample_281 Sample_282
## 26285465 24913369 26211792 24992848 4871840 24134928 28282487 27148858 29160364 20056763 27881727 25872216 23042341 27777808 24764343
## Sample_283 Sample_284 Sample_285 Sample_286 Sample_287 Sample_288 Sample_289 Sample_29 Sample_290 Sample_291 Sample_292 Sample_293 Sample_294 Sample_295 Sample_296
## 30935252 30034579 29826367 23934403 24920362 22665784 23390959 23532515 25872662 23532542 25997477 24964728 23800680 24349863 23955621
## Sample_297 Sample_298 Sample_299 Sample_3 Sample_30 Sample_300 Sample_301 Sample_302 Sample_303 Sample_304 Sample_305 Sample_306 Sample_307 Sample_308 Sample_309
## 23519863 23931382 26800586 26481876 23624209 30092547 23293797 24948859 25560715 23572038 23862478 25300091 25100668 24328037 23556377
## Sample_31 Sample_310 Sample_311 Sample_312 Sample_313 Sample_314 Sample_315 Sample_316 Sample_317 Sample_318 Sample_319 Sample_32 Sample_320 Sample_321 Sample_322
## 22136186 22554461 25794488 22084665 28260912 28485758 26987403 28942634 26898974 26294935 28388535 25335513 26489800 22273644 21061531
## Sample_323 Sample_324 Sample_325 Sample_326 Sample_327 Sample_328 Sample_329 Sample_33 Sample_330 Sample_331 Sample_332 Sample_333 Sample_334 Sample_335 Sample_336
## 23672528 21236621 20726401 20335675 19817550 21911365 28672474 22519490 29551499 27203956 29177516 26751877 27382543 28412881 26956753
## Sample_337 Sample_338 Sample_339 Sample_34 Sample_340 Sample_341 Sample_342 Sample_343 Sample_344 Sample_345 Sample_346 Sample_347 Sample_348 Sample_349 Sample_35
## 30351182 25897012 25661723 22773589 25901057 22411852 23159050 25386451 22391816 25104267 24103925 24995733 25400273 23937203 22354147
## Sample_350 Sample_351 Sample_352 Sample_353 Sample_354 Sample_355 Sample_356 Sample_357 Sample_358 Sample_359 Sample_36 Sample_360 Sample_37 Sample_38 Sample_39
## 23644976 25677967 23471807 28039008 27191003 30974673 28955689 23375190 26204823 26723658 23605309 27538675 22547854 21808157 19702060
## Sample_4 Sample_40 Sample_41 Sample_42 Sample_43 Sample_44 Sample_45 Sample_46 Sample_47 Sample_48 Sample_49 Sample_5 Sample_50 Sample_51 Sample_52
## 27902063 22238066 26365273 27592302 24178420 25173832 22791654 23881267 21650451 23150224 25356326 26685556 28085624 23795269 26849718
## Sample_53 Sample_54 Sample_55 Sample_56 Sample_57 Sample_58 Sample_59 Sample_6 Sample_60 Sample_61 Sample_62 Sample_63 Sample_64 Sample_65 Sample_66
## 24411833 22972476 23903581 23152774 25015737 25633098 23223994 23656954 27207763 23339064 24617158 24381198 23086040 26229090 28633158
## Sample_67 Sample_68 Sample_69 Sample_7 Sample_70 Sample_71 Sample_72 Sample_73 Sample_74 Sample_75 Sample_76 Sample_77 Sample_78 Sample_79 Sample_8
## 26742024 30064215 27098721 21905857 26655321 24866231 24133480 24114391 28226413 26792238 28455857 27123339 25820258 25282890 28100329
## Sample_80 Sample_81 Sample_82 Sample_83 Sample_84 Sample_85 Sample_86 Sample_87 Sample_88 Sample_89 Sample_9 Sample_90 Sample_91 Sample_92 Sample_93
## 25246768 31065176 29904244 31663130 33096338 32257316 30259020 25439243 30127125 21868437 25147363 22166321 23633231 22934244 23246987
## Sample_94 Sample_95 Sample_96 Sample_97 Sample_98 Sample_99
## 21281789 20454092 19436759 17289795 17387049 22647903
pTPM <- cpm(gene_counts)
sapply(as.data.frame(pTPM), sum)
## Sample_1 Sample_10 Sample_100 Sample_101 Sample_102 Sample_103 Sample_104 Sample_105 Sample_106 Sample_107 Sample_108 Sample_109 Sample_11 Sample_110 Sample_111
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_112 Sample_113 Sample_114 Sample_115 Sample_116 Sample_117 Sample_118 Sample_119 Sample_12 Sample_120 Sample_121 Sample_122 Sample_123 Sample_124 Sample_125
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_126 Sample_127 Sample_128 Sample_129 Sample_13 Sample_130 Sample_131 Sample_132 Sample_133 Sample_134 Sample_135 Sample_136 Sample_137 Sample_138 Sample_139
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_14 Sample_140 Sample_141 Sample_142 Sample_143 Sample_144 Sample_145 Sample_146 Sample_147 Sample_148 Sample_149 Sample_15 Sample_150 Sample_151 Sample_152
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_153 Sample_154 Sample_155 Sample_156 Sample_157 Sample_158 Sample_159 Sample_16 Sample_160 Sample_161 Sample_162 Sample_163 Sample_164 Sample_165 Sample_166
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_167 Sample_168 Sample_169 Sample_17 Sample_170 Sample_171 Sample_172 Sample_173 Sample_174 Sample_175 Sample_176 Sample_177 Sample_178 Sample_179 Sample_18
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_180 Sample_181 Sample_182 Sample_183 Sample_184 Sample_185 Sample_186 Sample_187 Sample_188 Sample_189 Sample_19 Sample_190 Sample_191 Sample_192 Sample_193
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_194 Sample_195 Sample_196 Sample_197 Sample_198 Sample_199 Sample_2 Sample_20 Sample_200 Sample_201 Sample_202 Sample_203 Sample_204 Sample_205 Sample_206
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_207 Sample_208 Sample_209 Sample_21 Sample_210 Sample_211 Sample_212 Sample_213 Sample_214 Sample_215 Sample_216 Sample_217 Sample_218 Sample_219 Sample_22
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_220 Sample_221 Sample_222 Sample_223 Sample_224 Sample_225 Sample_226 Sample_227 Sample_228 Sample_229 Sample_23 Sample_230 Sample_231 Sample_232 Sample_233
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_234 Sample_235 Sample_236 Sample_237 Sample_238 Sample_239 Sample_24 Sample_240 Sample_241 Sample_242 Sample_243 Sample_244 Sample_245 Sample_246 Sample_247
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_248 Sample_249 Sample_25 Sample_250 Sample_251 Sample_252 Sample_253 Sample_254 Sample_255 Sample_256 Sample_257 Sample_258 Sample_259 Sample_26 Sample_260
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_261 Sample_262 Sample_263 Sample_264 Sample_268 Sample_27 Sample_272 Sample_273 Sample_275 Sample_276 Sample_279 Sample_28 Sample_280 Sample_281 Sample_282
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_283 Sample_284 Sample_285 Sample_286 Sample_287 Sample_288 Sample_289 Sample_29 Sample_290 Sample_291 Sample_292 Sample_293 Sample_294 Sample_295 Sample_296
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_297 Sample_298 Sample_299 Sample_3 Sample_30 Sample_300 Sample_301 Sample_302 Sample_303 Sample_304 Sample_305 Sample_306 Sample_307 Sample_308 Sample_309
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_31 Sample_310 Sample_311 Sample_312 Sample_313 Sample_314 Sample_315 Sample_316 Sample_317 Sample_318 Sample_319 Sample_32 Sample_320 Sample_321 Sample_322
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_323 Sample_324 Sample_325 Sample_326 Sample_327 Sample_328 Sample_329 Sample_33 Sample_330 Sample_331 Sample_332 Sample_333 Sample_334 Sample_335 Sample_336
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_337 Sample_338 Sample_339 Sample_34 Sample_340 Sample_341 Sample_342 Sample_343 Sample_344 Sample_345 Sample_346 Sample_347 Sample_348 Sample_349 Sample_35
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_350 Sample_351 Sample_352 Sample_353 Sample_354 Sample_355 Sample_356 Sample_357 Sample_358 Sample_359 Sample_36 Sample_360 Sample_37 Sample_38 Sample_39
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_4 Sample_40 Sample_41 Sample_42 Sample_43 Sample_44 Sample_45 Sample_46 Sample_47 Sample_48 Sample_49 Sample_5 Sample_50 Sample_51 Sample_52
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_53 Sample_54 Sample_55 Sample_56 Sample_57 Sample_58 Sample_59 Sample_6 Sample_60 Sample_61 Sample_62 Sample_63 Sample_64 Sample_65 Sample_66
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_67 Sample_68 Sample_69 Sample_7 Sample_70 Sample_71 Sample_72 Sample_73 Sample_74 Sample_75 Sample_76 Sample_77 Sample_78 Sample_79 Sample_8
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_80 Sample_81 Sample_82 Sample_83 Sample_84 Sample_85 Sample_86 Sample_87 Sample_88 Sample_89 Sample_9 Sample_90 Sample_91 Sample_92 Sample_93
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
## Sample_94 Sample_95 Sample_96 Sample_97 Sample_98 Sample_99
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
#saveRDS(pTPM, here::here(data_save_dir,"pTPM_Lifespan.rds"))
ref.samples <- samples %>%
dplyr::filter(Clinical_condition %in% "Normal") %>%
dplyr::filter(Treatments %in% "Control") %>%
dplyr::filter(Percent_oxygen %in% "21") %>%
pull(RNAseq_sampleID)
ref.column <- data.frame(Median = rowMedians(pTPM[,ref.samples]))
rownames(ref.column) = rownames(pTPM)
pTPM <- bind_cols(ref.column, pTPM)
nTPM <- NOISeq::tmm(pTPM, refColumn = "Median")
sapply(as.data.frame(nTPM), sum)
## Median Sample_1 Sample_10 Sample_100 Sample_101 Sample_102 Sample_103 Sample_104 Sample_105 Sample_106 Sample_107 Sample_108 Sample_109 Sample_11 Sample_110
## 991481.4 1050766.1 1021626.8 1059849.3 992136.4 1002143.8 1004410.5 1020797.6 1060170.2 1064022.3 939446.7 1004216.6 1025859.6 1081069.0 1034176.9
## Sample_111 Sample_112 Sample_113 Sample_114 Sample_115 Sample_116 Sample_117 Sample_118 Sample_119 Sample_12 Sample_120 Sample_121 Sample_122 Sample_123 Sample_124
## 923848.5 939013.6 933429.5 911295.5 1155071.8 1147535.3 1186344.6 1199752.0 856746.2 1071178.9 904099.4 986486.2 955686.8 951149.7 930700.0
## Sample_125 Sample_126 Sample_127 Sample_128 Sample_129 Sample_13 Sample_130 Sample_131 Sample_132 Sample_133 Sample_134 Sample_135 Sample_136 Sample_137 Sample_138
## 954388.2 966605.1 941739.6 964392.6 988886.9 1131013.8 983944.9 1013929.6 1037328.4 1063705.1 966052.5 949961.0 936483.2 935497.4 966344.8
## Sample_139 Sample_14 Sample_140 Sample_141 Sample_142 Sample_143 Sample_144 Sample_145 Sample_146 Sample_147 Sample_148 Sample_149 Sample_15 Sample_150 Sample_151
## 930953.7 1017601.4 980984.2 1008276.6 1038846.0 1037602.7 1005085.2 922887.2 1017146.6 1012301.8 992302.3 1057783.6 975887.1 1045695.6 976332.4
## Sample_152 Sample_153 Sample_154 Sample_155 Sample_156 Sample_157 Sample_158 Sample_159 Sample_16 Sample_160 Sample_161 Sample_162 Sample_163 Sample_164 Sample_165
## 1319097.4 1171766.9 1090194.4 1423246.5 1609946.3 1003877.4 1078730.9 1638355.5 943563.4 979891.7 1033709.3 1029057.5 862784.9 1095373.5 1190964.6
## Sample_166 Sample_167 Sample_168 Sample_169 Sample_17 Sample_170 Sample_171 Sample_172 Sample_173 Sample_174 Sample_175 Sample_176 Sample_177 Sample_178 Sample_179
## 1119450.3 892753.8 988951.9 921656.6 929149.4 920897.9 971562.2 954866.3 1335379.4 969426.2 932003.4 1281946.5 942553.9 930057.4 979582.6
## Sample_18 Sample_180 Sample_181 Sample_182 Sample_183 Sample_184 Sample_185 Sample_186 Sample_187 Sample_188 Sample_189 Sample_19 Sample_190 Sample_191 Sample_192
## 1026001.9 943665.3 962007.9 967698.7 951926.4 962286.0 985675.5 1055178.1 1021182.1 1025316.7 924228.0 1064841.1 968633.7 1015914.6 1009461.6
## Sample_193 Sample_194 Sample_195 Sample_196 Sample_197 Sample_198 Sample_199 Sample_2 Sample_20 Sample_200 Sample_201 Sample_202 Sample_203 Sample_204 Sample_205
## 973096.4 915891.6 999157.6 975334.9 1017110.4 1014185.5 981763.1 1127072.7 1052924.0 1017077.1 1002929.4 946570.3 939863.3 997164.4 936108.8
## Sample_206 Sample_207 Sample_208 Sample_209 Sample_21 Sample_210 Sample_211 Sample_212 Sample_213 Sample_214 Sample_215 Sample_216 Sample_217 Sample_218 Sample_219
## 978580.9 995164.6 1048434.5 997109.2 1026260.6 1053927.0 935633.4 988063.5 1045840.2 1033144.7 1054976.6 1062966.4 946389.8 905750.9 935392.3
## Sample_22 Sample_220 Sample_221 Sample_222 Sample_223 Sample_224 Sample_225 Sample_226 Sample_227 Sample_228 Sample_229 Sample_23 Sample_230 Sample_231 Sample_232
## 986706.6 934388.0 954049.4 897514.2 1002634.5 1053513.3 928321.3 952841.8 999801.4 978406.4 972211.2 1011741.1 921814.0 974164.8 1006459.9
## Sample_233 Sample_234 Sample_235 Sample_236 Sample_237 Sample_238 Sample_239 Sample_24 Sample_240 Sample_241 Sample_242 Sample_243 Sample_244 Sample_245 Sample_246
## 919243.3 924189.0 926176.8 1015612.9 920679.4 917318.9 896150.5 1023412.3 896050.7 883399.7 898822.1 871954.1 876832.2 1025349.8 931935.6
## Sample_247 Sample_248 Sample_249 Sample_25 Sample_250 Sample_251 Sample_252 Sample_253 Sample_254 Sample_255 Sample_256 Sample_257 Sample_258 Sample_259 Sample_26
## 900678.6 866820.0 881021.9 1059398.3 900418.9 904950.0 979308.0 911075.8 903931.6 895790.7 832252.2 969839.4 988993.4 1023860.5 1012316.5
## Sample_260 Sample_261 Sample_262 Sample_263 Sample_264 Sample_268 Sample_27 Sample_272 Sample_273 Sample_275 Sample_276 Sample_279 Sample_28 Sample_280 Sample_281
## 1005956.4 1013720.0 1132808.0 1080317.0 1041207.7 591009.2 1055125.3 1039794.2 1058765.5 1061412.7 934888.2 1058710.2 1067108.5 1030460.4 1065964.5
## Sample_282 Sample_283 Sample_284 Sample_285 Sample_286 Sample_287 Sample_288 Sample_289 Sample_29 Sample_290 Sample_291 Sample_292 Sample_293 Sample_294 Sample_295
## 1057949.6 1035406.9 1107847.8 1141781.6 1067305.3 947230.8 957321.8 925780.0 1053836.6 907061.2 911593.7 902483.5 912028.6 891579.4 927919.6
## Sample_296 Sample_297 Sample_298 Sample_299 Sample_3 Sample_30 Sample_300 Sample_301 Sample_302 Sample_303 Sample_304 Sample_305 Sample_306 Sample_307 Sample_308
## 951616.3 907157.5 932840.3 1005548.0 1115683.9 1090416.1 1017416.0 1008504.7 1025205.6 1007352.5 1013868.8 950287.2 976579.7 997119.3 1015931.0
## Sample_309 Sample_31 Sample_310 Sample_311 Sample_312 Sample_313 Sample_314 Sample_315 Sample_316 Sample_317 Sample_318 Sample_319 Sample_32 Sample_320 Sample_321
## 975437.1 1027479.8 957066.0 1017974.3 969672.6 1009472.5 986582.9 951723.5 992946.1 1012313.1 994100.9 1056228.2 957883.7 1016472.8 1042105.2
## Sample_322 Sample_323 Sample_324 Sample_325 Sample_326 Sample_327 Sample_328 Sample_329 Sample_33 Sample_330 Sample_331 Sample_332 Sample_333 Sample_334 Sample_335
## 1036409.1 1106093.3 1066129.6 1085573.8 1104581.4 1083780.1 1182702.2 983443.7 934435.5 956684.5 963981.8 977825.5 1020098.5 1024791.0 1019203.8
## Sample_336 Sample_337 Sample_338 Sample_339 Sample_34 Sample_340 Sample_341 Sample_342 Sample_343 Sample_344 Sample_345 Sample_346 Sample_347 Sample_348 Sample_349
## 1081725.2 1098705.4 1155634.5 930859.5 948243.1 887252.0 911957.3 919930.6 912450.1 914866.1 919987.2 930069.6 932189.7 958607.6 975561.7
## Sample_35 Sample_350 Sample_351 Sample_352 Sample_353 Sample_354 Sample_355 Sample_356 Sample_357 Sample_358 Sample_359 Sample_36 Sample_360 Sample_37 Sample_38
## 957835.6 968666.3 1004630.4 975995.7 901300.8 928833.4 1016147.8 972336.1 964149.6 952721.1 1000615.7 1005549.6 989689.6 1041849.4 1085709.2
## Sample_39 Sample_4 Sample_40 Sample_41 Sample_42 Sample_43 Sample_44 Sample_45 Sample_46 Sample_47 Sample_48 Sample_49 Sample_5 Sample_50 Sample_51
## 1031037.7 1022081.1 1064321.8 1102632.4 1095792.3 881066.4 943020.5 927983.0 975316.4 959968.0 952483.4 914579.4 1112519.2 1051944.1 1009887.8
## Sample_52 Sample_53 Sample_54 Sample_55 Sample_56 Sample_57 Sample_58 Sample_59 Sample_6 Sample_60 Sample_61 Sample_62 Sample_63 Sample_64 Sample_65
## 1038861.1 1148191.4 1118094.4 1072673.8 1036009.8 1031868.7 1029098.4 986829.3 988950.2 1131431.3 1023985.8 1089695.9 1081765.7 1182696.4 960315.0
## Sample_66 Sample_67 Sample_68 Sample_69 Sample_7 Sample_70 Sample_71 Sample_72 Sample_73 Sample_74 Sample_75 Sample_76 Sample_77 Sample_78 Sample_79
## 1046059.1 1104412.5 1108508.8 1181114.2 963859.8 1063963.5 958256.8 1006708.5 953222.5 1079888.5 984256.0 1005736.6 1054483.2 1030095.1 1061619.7
## Sample_8 Sample_80 Sample_81 Sample_82 Sample_83 Sample_84 Sample_85 Sample_86 Sample_87 Sample_88 Sample_89 Sample_9 Sample_90 Sample_91 Sample_92
## 1050176.5 1064412.7 1023303.7 915363.3 950365.2 956201.3 987311.8 981801.8 986229.9 1029117.8 1009442.5 1018277.3 1039161.9 1030798.4 985900.3
## Sample_93 Sample_94 Sample_95 Sample_96 Sample_97 Sample_98 Sample_99
## 1083904.0 899046.8 965621.0 979753.6 1013333.1 1006696.0 963602.1
nTPM <- nTPM[,-1]
saveRDS(nTPM, here::here("Data/Fibroblasts/ProcessedData", "nTPM_Lifespan.rds"))
data_nTPM <- nTPM %>%
as.data.frame() %>%
rownames_to_column("Gene") %>%
pivot_longer(cols = -Gene, names_to = "RNAseq_sampleID", values_to = "nTPM")
mitocarta <- readxl::read_xls(here::here("Data", "MitoCarta", "OriginalData",
"HumanMitoCarta3_0.xls"), sheet = 4) %>%
select(MitoPathway, Genes) %>%
na.omit()
gene_to_pathway <- splitstackshape::cSplit(mitocarta, 'Genes', ',') %>%
column_to_rownames("MitoPathway") %>%
t() %>%
as.data.frame()
gene_to_pathway <- gene_to_pathway %>%
pivot_longer(cols = colnames(gene_to_pathway), names_to = "Pathway", values_to = "Gene") %>%
na.omit()
## Mitogenes in data
mitocarta <- readxl::read_xls(here::here("Data", "MitoCarta", "OriginalData","HumanMitoCarta3_0.xls"), sheet = 2)
mitogenes <- mitocarta$Symbol
all_genes <- unique(data_nTPM$Gene)
mitogenes_in_data <- sort(mitogenes[which(mitogenes %in% all_genes)])
length(mitogenes_in_data)
## [1] 1121
mitogenes_not_in_data <- mitogenes[which(!mitogenes %in% mitogenes_in_data)]
mitogenes_not_in_data
## [1] "MT-ATP6" "MT-CO2" "MT-CO1" "MT-ND2" "MT-ND4" "MT-ND5" "MT-CYB" "MT-ATP8" "MT-CO3" "MT-ND3"
## [11] "MT-ND1" "MT-ND4L" "MYG1" "MT-ND6" "RP11_469A15.2"
manual_mitogene_pull <- c("ND1", "ATP6", "COX2", "COX1", "ND2", "ND4", "ND5", "CYTB", "ATP8", "COX3", "ND3", "ND4L", "C12orf10", "ND6", "RP11_469A15.2")
all_mitogenes_in_data <- c(mitogenes[which(!mitogenes %in% mitogenes_not_in_data)], manual_mitogene_pull[which(!manual_mitogene_pull %in% "RP11_469A15.2")])
data_mito <- data_nTPM %>%
filter(Gene %in% all_mitogenes_in_data ) %>%
mutate(Gene = case_when(
# renaming to match with mitocarta
Gene == "C12orf10" ~ "MYG1",
Gene == "ND1" ~ "MT-ND1",
Gene == "ND2" ~ "MT-ND2",
Gene == "COX1" ~ "MT-CO1",
Gene == "COX2" ~ "MT-CO2",
Gene == "ATP8" ~ "MT-ATP8",
Gene == "ATP6" ~ "MT-ATP6",
Gene == "COX3" ~ "MT-CO3",
Gene == "ND3" ~ "MT-ND3",
Gene == "ND4L" ~ "MT-ND4L",
Gene == "ND4" ~ "MT-ND4",
Gene == "ND5" ~ "MT-ND5",
Gene == "ND6" ~ "MT-ND6",
Gene == "CYTB" ~ "MT-CYB",
TRUE ~ Gene
)) %>%
mutate(Genome = case_when(
grepl("MT-", Gene) ~ "mtDNA",
TRUE~"NucDNA"
))
write_csv(data_mito, here::here("Data","Fibroblasts", "ProcessedData", "RNAseqData_MitoGenes.csv"))